import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import os


os.chdir("D:\\Python")
Tshirt = pd.read_csv("Tshirt_Sizing_Dataset.csv")


Tshirt.head(10)


# number of missing values by variables
Tshirt.isnull().sum()

Height (in cms)    0
Weight (in kgs)    0
T Shirt Size       0
dtype: int64


X = Tshirt.drop("T Shirt Size",axis=1)


y = Tshirt.loc[:, "T Shirt Size"]


from sklearn.preprocessing import LabelEncoder
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)


# Feature Scaling
# from sklearn.preprocessing import StandardScaler
# sc = StandardScaler()
# X_train = sc.fit_transform(X_train)
# X_test = sc.transform(X_test)


# Fitting K-NN to the Training set
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')


# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred

array([0, 1, 0, 1, 0], dtype=int64)


# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)


sns.heatmap(cm, annot=True, fmt="d")

<matplotlib.axes._subplots.AxesSubplot at 0x20e1ae42a90>


# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_grid, y_grid = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_grid[:, 0].min() - 1, stop = X_grid[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_grid[:, 1].min() - 1, stop = X_grid[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_grid)):
    plt.scatter(X_grid[y_grid == j, 0], X_grid[y_grid == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
    
plt.title('Training dataset')
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend()
plt.show()


# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_grid, y_grid = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_grid[:, 0].min() - 1, stop = X_grid[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_grid[:, 1].min() - 1, stop = X_grid[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_grid)):
    plt.scatter(X_grid[y_grid == j, 0], X_grid[y_grid == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
    
plt.title('Testing dataset')
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend()
plt.show()

	Height (in cms)	Weight (in kgs)	T Shirt Size
0	158	58	S
1	158	59	S
2	158	63	S
3	160	59	S
4	160	60	S
5	163	60	S
6	163	61	S
7	160	64	L
8	163	64	L
9	165	61	L

PROBLEM STATEMENT : T Shirt size prediction using the Machine Learning algorithm K Nearest Neighbor (KNN). We will use Python to build this model.¶

Import the packages¶

We will import the data¶

In this data Height and Weight are given; these are my indementdent variables. T shirt size is my dependent variable. Using the Height and Weight we will predict the T shirt size.¶

Checking for missing values¶

Observation: There are no missing values in the data¶

Data split¶

Segregating the independent variables as X and dependent variable as y¶

We need to change the t shirt size to numeric. Hence we will be using one hot lebel encoder to change it.¶

Now we will split the data into training (75% of the data) and rest 25% - named test, will be kept aside for later use.¶

STEP#3: MODEL TRAINING¶

STEP#4: MODEL TESTING¶

STEP#5: TESTING RESULTS VISUALIZATION¶

VISUALIZE TRAINING SET RESULTS¶

VISUALIZE TEST SET RESULTS¶

Sign off notes: Thanks for reading our free data science case study. If you want to read and learn more such free machine learning case studies then please visit our blog https://www.analyticseducator.com/Blog.html ¶

	Height (in cms)	Weight (in kgs)	T Shirt Size
0	158	58	S
1	158	59	S
2	158	63	S
3	160	59	S
4	160	60	S
5	163	60	S
6	163	61	S
7	160	64	L
8	163	64	L
9	165	61	L

	Height (in cms)	Weight (in kgs)	T Shirt Size
0	158	58	S
1	158	59	S
2	158	63	S
3	160	59	S
4	160	60	S
5	163	60	S
6	163	61	S
7	160	64	L
8	163	64	L
9	165	61	L

PROBLEM STATEMENT : T Shirt size prediction using the Machine Learning algorithm K Nearest Neighbor (KNN). We will use Python to build this model.¶

Import the packages¶

We will import the data¶

In this data Height and Weight are given; these are my indementdent variables. T shirt size is my dependent variable. Using the Height and Weight we will predict the T shirt size.¶

Checking for missing values¶

Observation: There are no missing values in the data¶

Data split¶

Segregating the independent variables as X and dependent variable as y¶

We need to change the t shirt size to numeric. Hence we will be using one hot lebel encoder to change it.¶

Now we will split the data into training (75% of the data) and rest 25% - named test, will be kept aside for later use.¶

STEP#3: MODEL TRAINING¶

STEP#4: MODEL TESTING¶

STEP#5: TESTING RESULTS VISUALIZATION¶

VISUALIZE TRAINING SET RESULTS¶

VISUALIZE TEST SET RESULTS¶

Sign off notes: Thanks for reading our free data science case study. If you want to read and learn more such free machine learning case studies then please visit our blog https://www.analyticseducator.com/Blog.html¶

Sign off notes: Thanks for reading our free data science case study. If you want to read and learn more such free machine learning case studies then please visit our blog https://www.analyticseducator.com/Blog.html ¶

	Height (in cms)	Weight (in kgs)	T Shirt Size
0	158	58	S
1	158	59	S
2	158	63	S
3	160	59	S
4	160	60	S
5	163	60	S
6	163	61	S
7	160	64	L
8	163	64	L
9	165	61	L